*combine lottery file with baseline variables, school attendance, and outcome data


cd "$data"



		use "$data_clean/all_applicants_wide", clear
		drop if gradeapp<=3
		
		keep if proj_year12<=2017
			duplicates drop
		count
		duplicates report sasid
		
		duplicates drop sasid, force // 2 dups -- come back to this
		
		
******************************************************************
**** Merge on outcomes and baseline variables ****
******************************************************************
		*merge sims baseline
		merge 1:1 sasid using "$data/baselinedemos_voter.dta", keep(1 3) nogen
		
		*merge sims school attendance
		merge 1:1 sasid using "$data/sims_voter_wide.dta", keep(1 3) nogen
		
		*some do not merge over b/c while they have a sasid, 
		*their attendence at a school was either zero or blank
		*or attendendence from a grade prior to potential charter years
		
		*merge mcas
		merge 1:1 sasid using "$data_clean/mcas_wide.dta", keep(1 3) nogen
		
		*ap
		merge 1:1 sasid using "$data_clean/ap.dta", keep(1 3) nogen
		
		*sat
		merge 1:1 sasid using "$data_clean/sat.dta", keep(1 3) nogen
		
		*merge hsgrad
		merge 1:1 sasid using "$data/hsgrad_voting.dta", keep(1 3) nogen
		
		*nsc degrees
		merge 1:1 sasid using "$data/flat_degrees_NSC.dta" , keep(1 3) nogen
		
		*merge nsc enrollment
		merge 1:1 sasid using  "$data/flat_enrollment_NSC.dta" , keep(1 3) nogen 
		*voting
		merge 1:1 sasid using "$data/sims_voting_all.dta", keep(1 3) nogen //As of July 19, 2021 now includes OOS voters, as of Jan 28, 2022 now includes 2020 voting, as of April 2022 includes PA and data cleaning fix			
	*merge SSDR
		merge 1:1 sasid using "$data_clean/ssdr_wide", keep(1 3) nogen
		drop *2020 *2021 *2022 *2023

	g opeid = ID_FSC_firstinst  
	destring opeid, replace

	*College characteristics from Chetty paper
	merge m:1 opeid using "$data_clean\chetty_college.dta", nogen  force keep(1 3) ///
		keepusing(k_mean k_mean_m k_mean_f opeid superopeid institution_name superopeid_name *mean*insuff* *mean*never* tier) 
		ren institution_name institution_name
			ren superopeid_name superopeid_name
	
	*spread insufficient and never attend outcomes
	foreach v of varlist *_insuff* *_never* {
		egen max=max(`v') 
		replace `v'=max
		drop max
	}
	
	compress
	
	*** replace k_mean for those who don't attend college
	***** note there are a few colleges that don't match and those get a different number	  
	replace k_mean = k_mean_insuff if k_mean==.&opeid!=. // insufficient data college

	g k_mean_bygender=k_mean_m if baselinefemale==0
	replace k_mean_bygender=k_mean_f if baselinefemale==1
	replace k_mean_bygender = k_mean_m_insuff if  k_mean_bygender==.&opeid!=.&baselinefemale==0 // insufficient data college
	replace k_mean_bygender =  k_mean_f_insuff if  k_mean_bygender==.&opeid!=.&baselinefemale==1 // insufficient data college
	
		replace k_mean= k_mean_never if k_mean==. // do not attend
		replace k_mean_bygender = k_mean_m_never if  k_mean_bygender==. & baselinefemale==0 // do not attend
		replace k_mean_bygender = k_mean_f_never if  k_mean_bygender==. & baselinefemale==1 // do not attend
	
		*outcomes are processed more below
		
		
******************************************************************
**** Lottery setup ****
* Baseline variables
* Endogenous variables
* Risk sets
******************************************************************

	*baseline
	drop baselinegrade
	gen byte baselinegrade=gradeapp-1
	*baseline SCORES
	g baseline_m=.
	g baseline_e=.
	
	
	* NEW 3/23/2015 EMS - The most accurate way to define baseline is not the grade before application, but the application year
	* That way, if students apply to multiple grades (as in the PK and Kindergarteners) or if a student gets held back, their proper baseline scores are used
			*extremely minor differences, going with this method		

	foreach n in 4 5 6 7 8 {		
			replace baseline_m=c_state_mrawsc`n'  if yearapp==mcasyear`n'
			replace baseline_e=c_state_erawsc`n' if yearapp==mcasyear`n'
	}

	*fixes for mcas testing 
	replace baseline_e=c_state_erawsc7 if baselinegrade==8&yearapp<=2005
	replace baseline_e=c_state_erawsc4 if (baselinegrade>=5&baselinegrade<=7)&yearapp<=2005
	replace baseline_m=c_state_mrawsc4 if (baselinegrade>=5&baselinegrade<=7)&yearapp<=2005

	g hasbaseline_m=(baseline_m~=.)
	g hasbaseline_e=(baseline_e~=.)

	qui tab baselineyob, ge(yobdum)
	
	
	g twoyearsout_e=.
	g twoyearsout_m=.
	g twoyearsout_grade =. 
	
	g two_prof_e=.
	g two_prof_m=.
	g two_adv_e=.
	g two_adv_m=.
	
	foreach n in 4 5 6 7 9 {		
			local n2=`n'+1
			replace twoyearsout_e = c_state_erawsc`n2' if gradeapp==`n'
			replace twoyearsout_m = c_state_mrawsc`n2'  if gradeapp==`n'
			replace twoyearsout_grade = `n2'  if gradeapp==`n'
			replace two_prof_e = 0 if escaleds`n2'!=. & gradeapp==`n'
			replace two_prof_e = 1 if escaleds`n2'!=. & escaleds`n2'>=240 & gradeapp==`n'
			replace two_prof_m = 0 if mscaleds`n2'!=. & gradeapp==`n'
			replace two_prof_m = 1 if mscaleds`n2'!=. &  mscaleds`n2'>=240 & gradeapp==`n'
			replace two_adv_e = 0 if escaleds`n2'!=. & gradeapp==`n'
			replace two_adv_e = 1 if escaleds`n2'!=. & escaleds`n2'>=260 & gradeapp==`n'
			replace two_adv_m = 0 if mscaleds`n2'!=. & gradeapp==`n'
			replace two_adv_m = 1 if mscaleds`n2'!=. &mscaleds`n2'>=260 & gradeapp==`n'
	}

	
	* post lotto year
	g  postlottoyear=yearapp+1
	g postlottomasscode=masscode5 if gradeapp==5
		replace postlottomasscode=masscode6 if gradeapp==6
		replace postlottomasscode=masscode9 if gradeapp==9
		replace postlottomasscode=masscode4 if gradeapp==4
		replace postlottomasscode=99999 if postlottomasscode==. //if no masscode, can stay in data for clustering

	qui tab postlottoyear, ge(yeardum) //revisited becausse prior version conditioned on present in 10th grade

*****************RISK SETS *******************************
	*Risk sets based on lotteried sample schools
	*DEFINE STUDY SAMPLE HERE -- right now all lotteried charters
			*APR 4120530
			*BosCol 4490305
			*Boston Green Academy 4110305
			*Boston Prep 4160305
			*CoaH  4370505 and CoaHII  35040505 
			*Codman 4380505
			*EdBrooke (not 2 and 3) 4280305
			*Excel (not 3) 4100205
			*Match HS 4690505
			*Match MS 4690505
			*RoxPrep (not Uncommon or Grove Hall or DP) 4840505
			
*BOSTON
	egen  lottogroupYR_boston=group(yearapp applyprioritygroup applyAPR applyBGA applyBosPrep applyBosCol ///
		applyMATCH_HS applyMATCH_MS applyCodman applyCoaH applyCoaHII applyExcel applyEdBrooke ///
		applyRoxPrep )
	qui tab lottogroupYR_boston, ge(db_)

*LTO SAMPLE  (restrict to schools in LTO)
	egen  lottogroupYR_lto=group( yearapp applyAPR  applyBosPrep applyBosCol ///
		applyMATCH_HS  applyCodman applyCoaH    )
	qui tab lottogroupYR_lto, ge(dl_)
	
*STATE SAMPLE
	egen lottogroupYR_state=group(yearapp applyprioritygroup applyAPR applyBGA applyBosPrep applyBosCol ///
		applyMATCH_HS applyMATCH_MS applyCodman applyCoaH applyCoaHII applyExcel applyEdBrooke ///
		applyRoxPrep applyAPR  applyGlobal   applyPVPA   applySturgis ///	
		 applyCapeCod     applyInnov   applyParker ///
		      applyFourRiv    applyMarble  applyKIPPLynn applyRisingTide ///
		applySalemAc  risk_*) //risk_* refers to lotteries with multiple batches
	qui tab lottogroupYR_state, ge(ds_)

	g lto_sample = 0
	foreach s in applyAPR  applyBosPrep applyBosCol 	applyMATCH_HS  applyCodman applyCoaH {
		replace lto_sample=1 if `s'==1
		}
	g boston_sample = 0
	foreach s in applyAPR applyBGA applyBosPrep applyBosCol applyRoxPrep ///
		applyMATCH_HS applyMATCH_MS applyCodman applyCoaH applyCoaHII applyExcel applyEdBrooke  {
		replace boston_sample=1 if `s'==1
		}
	g state_sample=0
		foreach s in applyAPR applyBGA applyBosPrep applyBosCol applyRoxPrep ///
		applyMATCH_HS applyMATCH_MS applyCodman applyCoaH applyCoaHII applyExcel applyEdBrooke ///
		applyGlobal   applyPVPA   applySturgis ///	
		 applyCapeCod     applyInnov   applyParker ///
		      applyFourRiv    applyMarble  applyKIPPLynn applyRisingTide ///
		applySalemAc   {
		replace state_sample=1 if `s'==1
		}

	
	*CREATE CLUSTER VARIABLE
	egen schoolXyear=group(postlottoyear postlottomasscode) //revisited becausse prior version conditioned on present in 10th grade
			egen schoolXyearLTO=group(year10 masscode10) 
			qui tab year10, gen(yearLTOdum)
			egen blah= group(year9 masscode9)
			replace schoolXyearLTO=blah if schoolXyearLTO==.
			drop blah

	* For those who apply in 9th, 5th, and 6th grades, separately add their total years in charter, then combine this variable
	* If people are in a charter middle school and the apply to a charter high school, we only want to consider their middle school 
		* lottery, but count all of their years in charter (even if they are in a charter in high school)
		* Since we've took the earliest grade application for each student, this is what we've done.
	foreach b in blottocharter  bltocharter  urbancharter nonurbancharter bcharter charter_notbos{
		egen `b'years9=rowtotal(`b'9 `b'10 `b'11 `b'12 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==9
		egen `b'years5=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'11 `b'12  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==5
		egen `b'years6=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10 `b'11 `b'12  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==6
		egen `b'years7=rowtotal( `b'7 `b'8 `b'9 `b'10 `b'11 `b'12   `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==7
		egen `b'years4=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'11 `b'12  `b'_repeats4  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==4

		gen `b'years=`b'years9
			replace `b'years=`b'years4 if `b'years4!=.
			replace `b'years=`b'years5 if `b'years5!=.
			replace `b'years=`b'years6 if `b'years6!=.
			replace `b'years=`b'years7 if `b'years7!=.
*by 10th grade	
		egen `b'years9by10=rowtotal(`b'9 `b'10 `b'_repeats9 ) if gradeapp==9
		egen `b'years4by10=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'_repeats4 `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==4
		egen `b'years5by10=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==5
		egen `b'years6by10=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==6
		egen `b'years7by10=rowtotal( `b'7 `b'8 `b'9 `b'10  `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==7

		gen `b'yearsby10=`b'years9by10
			replace `b'yearsby10=`b'years4by10 if `b'years4by10!=.
			replace `b'yearsby10=`b'years5by10 if `b'years5by10!=.
			replace `b'yearsby10=`b'years6by10 if `b'years6by10!=.
			replace `b'yearsby10=`b'years7by10 if `b'years7by10!=.
			*drop `b'years9 `b'years5 `b'years6
			
*by 11th grade	
		egen `b'years9by11=rowtotal(`b'9 `b'10 `b'_repeats9 `b'11 `b'_repeats10  ) if gradeapp==9
		egen `b'years4by11=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'_repeats4 `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10  ) if gradeapp==4
		egen `b'years5by11=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 ) if gradeapp==5
		egen `b'years6by11=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 ) if gradeapp==6
		egen `b'years7by11=rowtotal( `b'7 `b'8 `b'9 `b'10  `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10  ) if gradeapp==7

		gen `b'yearsby11=`b'years9by11
			replace `b'yearsby11=`b'years4by11 if `b'years4by11!=.
			replace `b'yearsby11=`b'years5by11 if `b'years5by11!=.
			replace `b'yearsby11=`b'years6by11 if `b'years6by11!=.
			replace `b'yearsby11=`b'years7by11 if `b'years7by11!=.
			*drop `b'years9 `b'years5 `b'years6
			
*by 12th grade	
		egen `b'years9by12=rowtotal(`b'9 `b'10 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==9
		egen `b'years4by12=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'_repeats4 `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==4
		egen `b'years5by12=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11  ) if gradeapp==5
		egen `b'years6by12=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==6
		egen `b'years7by12=rowtotal( `b'7 `b'8 `b'9 `b'10  `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==7

		gen `b'yearsby12=`b'years9by12
			replace `b'yearsby12=`b'years4by12 if `b'years4by12!=.
			replace `b'yearsby12=`b'years5by12 if `b'years5by12!=.
			replace `b'yearsby12=`b'years6by12 if `b'years6by12!=.
			replace `b'yearsby12=`b'years7by12 if `b'years7by12!=.
			*drop `b'years9 `b'years5 `b'years6
			
*two years after lottery	
		egen `b'years9by2=rowtotal(`b'9 `b'10 `b'_repeats9 ) if gradeapp==9
		egen `b'years4by2=rowtotal(`b'5 `b'4   `b'_repeats4   ) if gradeapp==4
		egen `b'years5by2=rowtotal(`b'5 `b'6   `b'_repeats5   ) if gradeapp==5
		egen `b'years6by2=rowtotal(`b'6 `b'7 `b'_repeats6 ) if gradeapp==6
		egen `b'years7by2=rowtotal( `b'7 `b'8  `b'_repeats7  ) if gradeapp==7

		gen `b'yearsby2=`b'years9by2
			replace `b'yearsby2=`b'years4by2 if `b'years4by2!=.
			replace `b'yearsby2=`b'years5by2 if `b'years5by2!=.
			replace `b'yearsby2=`b'years6by2 if `b'years6by2!=.
			replace `b'yearsby2=`b'years7by2 if `b'years7by2!=.
			*drop `b'years9 `b'years5 `b'years6
			
		egen `b'yearsLTO=rowtotal(`b'9 `b'10 `b'_repeats9 )			
			
	g byte ever_`b'=`b'years>=1 &`b'years!=.
	g byte ever_`b'by10=`b'yearsby10>=1&`b'yearsby10!=.
	g byte ever_`b'by11=`b'yearsby11>=1&`b'yearsby11!=.
	g byte ever_`b'by12=`b'yearsby12>=1&`b'yearsby12!=.
	g byte  ever_`b'by2=`b'yearsby2>=1&`b'yearsby2!=.
	g byte ever_`b'LTO = `b'yearsLTO>=1&`b'yearsLTO!=.

			
	}
	
*alternative construction of endogenous variables using school by school info
*these are based on ever touching the school, not number of years

*Switch to school code variables Offers and Apply
			foreach vartype in offer apply initial_offer  {
				gen `vartype'_4280305 = (`vartype'EdBrooke==1)
				gen `vartype'_4100205 = (`vartype'Excel==1)
				gen `vartype'_4840505 = (`vartype'RoxPrep==1)
				gen `vartype'_4120530 = (`vartype'APR==1)
				gen `vartype'_4490305 = (`vartype'BosCol==1)
				gen `vartype'_4160305 = (`vartype'BosPrep==1)
				gen `vartype'_4110305 = (`vartype'BGA==1)
				gen `vartype'_4370505 = (`vartype'CoaH==1)
				gen `vartype'_35040505 = (`vartype'CoaHII==1)
				gen `vartype'_4380505 = (`vartype'Codman==1)
				gen `vartype'_4690505 = (`vartype'MATCH_HS==1 | `vartype'MATCH_MS==1)
			}		
			
*school types
global civic 4100205 /*Excel*/ 4840505  /*Roxprep*/   4120530  /*APR*/  4110305 /*BGA*/ 4370505 /*Coah*/ 35040505 /*CoahII*/
global notcivic  4280305 /*Brooke*/ 4490305 /*BosCol*/ 4160305 /*BosPrep*/ 4380505 /*Codman*/ 4690509 /*Match HS*/ 4690506	/*match MS*/	
global LTO 4120530  /*APR*/  4370505 /*Coah*/ 4490305 /*BosCol*/ 4160305 /*BosPrep*/ 4380505 /*Codman*/ 4690509 /*Match HS*/

g twoyearsoutgrade = baselinegrade +3

global masscodes_insample 4120530  4490305  4110305 4160305 4370505 35040505 4380505 4280305 4100205 4690505 4840505

	* Setup for charter years by school:
				*only counting years when have applied to that school
	foreach m of global masscodes_insample {
			gen ever_within2_`m' = 0
			gen ever_by9_`m' = 0
			gen ever_by10_`m' = 0
			gen ever_by11_`m' = 0
			gen ever_by12_`m' = 0
			gen ever_LTO_`m' = 0
			foreach n of numlist 5/12 {
				gen schl_`m'_`n'=(masscode`n'==`m') if masscode`n'!=.
				*count before test w/in two years -- should keep MS and HS separate
				replace ever_within2_`m'=1 if schl_`m'_`n'==1 & `n'>baselinegrade  & `n'<= twoyearsoutgrade & apply_`m' ==1 
				*count before 9th grade 
				replace ever_by9_`m'=1 if schl_`m'_`n'==1 & `n'>baselinegrade  & `n'<= 9 & apply_`m' ==1 
				*count before 10th grade 
				replace ever_by10_`m'=1 if schl_`m'_`n'==1 & `n'>baselinegrade  & `n'<= 10 & apply_`m' ==1 
				*count before 11th grade 
				replace ever_by11_`m'=1 if schl_`m'_`n'==1 & `n'>baselinegrade  & `n'<= 11 & apply_`m' ==1 
				*count before 12th grade 
				replace ever_by12_`m'=1 if schl_`m'_`n'==1 & `n'>baselinegrade  & `n'<= 12 & apply_`m' ==1
				*count before 9th/10th grade as in Angrist et al. 2016 
				replace ever_LTO_`m'=1 if schl_`m'_`n'==1 & `n'>baselinegrade & `n'>=9  & `n'<= 10 & apply_`m' ==1	
				}	
	}
			
			*divide match into two school codes
			foreach vartype in offer apply initial_offer ever_within2 ever_by9 ever_by10 ever_by11 ever_by12 ever_LTO   {
				gen `vartype'_4690506=`vartype'_4690505 if `vartype'_4690505==1 & (baselinegrade==4 | baselinegrade==5 )
					replace `vartype'_4690506 = 0 if `vartype'_4690506==.
				gen `vartype'_4690509=`vartype'_4690505 if `vartype'_4690505==1 & baselinegrade==8
					replace `vartype'_4690509 = 0 if `vartype'_4690509==.
				drop `vartype'_4690505
			}

global masscodes_insample2 4120530  4490305  4110305 4160305 4370505 35040505 4380505 4280305 4100205  4840505  4690509 4690506

	g D_within2 = 0
	g D_by9 = 0
	g D_by10 = 0
	g D_by11 = 0
	g D_by12 = 0

	g initial_offer_s = 0
	g offer_s =0
	foreach m of global masscodes_insample2 {
		replace D_within2=1 if ever_within2_`m'==1
		replace D_by9=1 if ever_by9_`m'==1
		replace D_by10=1 if ever_by10_`m'==1
		replace D_by11=1 if ever_by11_`m'==1
		replace D_by12=1 if ever_by12_`m'==1
		replace initial_offer_s=1 if initial_offer_`m'==1
		replace offer_s=1 if offer_`m'==1
	}
	g D_civic = 0
	g initial_offer_civic = 0
	g offer_civic =0
	foreach m of global civic {
		replace D_civic=1 if ever_by12_`m'==1
		replace initial_offer_civic=1 if initial_offer_`m'==1
		replace offer_civic=1 if offer_`m'==1
	}
	g D_notcivic = 0
	g initial_offer_notcivic = 0
	g offer_notcivic =0
	foreach m of global notcivic {
		replace D_notcivic=1 if ever_by12_`m'==1
		replace initial_offer_notcivic=1 if initial_offer_`m'==1
		replace offer_notcivic=1 if offer_`m'==1
	}
	
	g D_LTO = 0
	g initial_offer_LTO = 0
	g offer_LTO = 0
	foreach m of global LTO {
		replace D_LTO=1 if ever_LTO_`m'==1
		replace initial_offer_LTO=1 if initial_offer_`m'==1
		replace offer_LTO= 1 if offer_`m'==1
	}
	
	g waitlist_offer_s= offer_s
	replace waitlist_offer_s = 0 if initial_offer_s==1
	
	g waitlist_offer_civic = offer_civic
	replace waitlist_offer_civic = 0 if initial_offer_civic==1
	
	g waitlist_offer_notcivic = offer_notcivic
	replace waitlist_offer_notcivic = 0 if initial_offer_notcivic==1	
	
	g waitlist_offer_LTO = offer_LTO
	replace waitlist_offer_LTO = 0 if initial_offer_LTO==1
	

	
	
******************************************************************
**** Create/process outcomes as needed ****
******************************************************************


*ontime grade progression based on 9th grade
foreach n of numlist 9/12{
	local plus = `n'-9
	g ontime`n'=(year`n'<=(year9+`plus'))
	replace ontime`n'=0 if year`n'==.
	*restrict repeaters to ontime in grade
g byte repeat`n'=0 if ontime`n'==1
replace repeat`n'=1 if repeats`n'>=1&repeats`n'!=.
}	

*repeat any HS grade
g byte repeatany=(repeat9==1|repeat10==1|repeat11==1|repeat12==1) if (repeat9!=.&repeat10!=.&repeat11!=.&repeat12!=.)

*Process SAT
	gen satreason=satvrecn+satmrecn
	gen took_sat=satmrecn!=. //note no writing in class of 2017

	gen above_lowq_satvrecn = satvrecn>=430 & satvrecn!=.
	gen above_lowq_satmrecn = satmrecn>=440 & satmrecn!=.
	gen above_lowq_satw = satw>=420 & satw!=.
	gen above_lowq_satreason=satreason>=880 & satreason!=.
	gen above_lowq_sattot=  sattot>=1310 & sattot!=.

	gen above_median_satvrecn = satvrecn>=500 & satvrecn!=.
	gen above_median_satmrecn = satmrecn>=520 & satmrecn!=.
	gen above_median_satw = satw>=500 & satw!=.
	gen above_median_satreason=satreason>=1020 & satreason!=.
	gen above_median_sattot=  sattot>=1520 & sattot!=.

	gen above_thirdq_satvrecn = satvrecn>=580 & satvrecn!=.
	gen above_thirdq_satmrecn = satmrecn>=600 & satmrecn!=.
	gen above_thirdq_satw = satw>=580 & satw!=.
	gen above_thirdq_satreason=satreason>=1170 & satreason!=.
	gen above_thirdq_sattot=  sattot>=1750 & sattot!=.

	foreach v of varlist took_sat satreason sat* above* {
		replace `v'=. if proj_year<2007
		}
	
	
	 ren took_sat took_sat
	 ren sattot score_sattot
	 foreach v of varlist satvrecn satmrecn satw satreason{
		g took_`v'=`v'!=.
		g score_`v'=`v'
		}
	
	***************
	*ALL AP TESTS *
	***************
	local aptest1 "ushistgr arthisgr artstdgr biolgr chemgr chinesgr comscagr comscbgr econmigr econmagr englangr englitgr envscigr eurhisgr frnlangr frenltgr gerlagr govusgr govcomgr italgr"
	local aptest2 "japangr latinvgr latincgr calcabgr calcbcgr musictgr physbgr physmgr physemgr psychgr spanlagr spanltgr statgr humgeogr art3dgr wdhistgr artst2gr"
	  
	*GENERATE TOOK-X-TEST	
	foreach x in `aptest1' `aptest2' {
		replace ap_`x' = 0 if ap_`x' ==.
		gen took_ap_`x' = (ap_`x' > 0 & ap_`x' < 6)
	}


	*D.SUN 4-15-2013
	*CHANGE SLUGGING DEFITION - WHOEVER SCORE 1, CHANGES IT TO 0
	foreach x in `aptest1' `aptest2' {
		replace ap_`x' = 0 if ap_`x' ==1 & took_ap_`x'==1

	}

	*GENERATE NEW VARS
	egen took_any = rowmax(took_ap_*)
	egen N_aps_taken = rowtotal(took_ap_*)
	egen max_ap_taken = rowmax(ap_*)
	  
	forv i = 1(1)5{
		gen above_s`i'_any = (max_ap_taken >= `i')
	}

	egen sum_ap = rowtotal(ap_*)
	replace sum_ap = 0 if took_any == 0
	count if sum_ap == 0
	su sum_ap if took_any==1
	  
	drop max_ap_score n_ap

	*****************
	* CALCULUS 
	* AB / BC 
	*****************
	local calc "calcabgr calcbcgr"
	foreach x in `calc' {
		replace ap_`x' = 0 if took_ap_`x' == 0
	}

	egen ap_calc = rowmax(ap_calcabgr ap_calcbcgr)
	egen took_ap_calc = rowmax(took_ap_calcabgr took_ap_calcbcgr)

	
	*****************
	* US
	* hist or gov 
	*****************
	local us "ushistgr govusgr"
	foreach x in `us' {
		replace ap_`x' = 0 if took_ap_`x' == 0
	}

	egen ap_ushistgov = rowmax(ap_ushistgr ap_govusgr)
	egen took_ap_ushistgov = rowmax(took_ap_ushistgr took_ap_govusgr)
	
	*****************
	*economics
	* micro or macro 
	*****************
	local us "econmi econma"
	foreach x in `us' {
		replace ap_`x' = 0 if took_ap_`x' == 0
	}

	egen ap_econ = rowmax(ap_econmi ap_econma)
	egen took_ap_econ = rowmax(took_ap_econmi took_ap_econma)
	
	*****************
	* ENGLISH
	* LITER/LANGUAGE 
	*****************
	local engl "englitgr englangr"
	foreach x in `engl' {
		replace ap_`x' = 0 if took_ap_`x' == 0
	}

	egen ap_engl = rowmax(ap_englitgr ap_englangr)
	egen took_ap_engl = rowmax(took_ap_englitgr took_ap_englangr)
	
	*****************
	* SPANISH
	* LITER/LANGUAGE 
	*****************
	local span "spanltgr spanlagr"
	foreach x in `span' {
		replace ap_`x' = 0 if took_ap_`x' == 0
	}

	egen ap_span = rowmax(ap_spanltgr ap_spanlagr)
	egen took_ap_span = rowmax(took_ap_spanlagr took_ap_spanltgr)
	
	*****************
	* ANY FOREIGN LANGUAGE
	* LITER/LANGUAGE 
	*****************
	*nobody takes  ap_latincgr ap_frenltgr 
	local lang " ap_chinesgr ap_frnlangr  ap_gerlagr ap_italgr ap_japangr ap_latinvgr  ap_spanlagr ap_spanltgr"
	foreach x in `lang' {
		replace `x' = 0 if took_`x' == 0
	}

	egen ap_language = rowmax( ap_chinesgr ap_frnlangr ap_frenltgr ap_gerlagr ap_italgr ap_japangr ap_latinvgr ap_latincgr ap_spanlagr ap_spanltgr)
	egen took_ap_language = rowmax( took_ap_chinesgr took_ap_frnlangr took_ap_frenltgr took_ap_gerlagr took_ap_italgr took_ap_japangr took_ap_latinvgr took_ap_latincgr took_ap_spanlagr took_ap_spanltgr)
	 

	*****************
	*THE CATEGORIES *
	*****************
	local science "ap_biolgr ap_chemgr ap_comscagr ap_comscbgr ap_envscigr ap_physbgr ap_physmgr ap_physemgr" 
	local science_new "tap_biolgr tap_chemgr tap_comscagr tap_comscbgr tap_envscigr tap_physbgr tap_physmgr tap_physemgr" 

	local took_science ""
	foreach x in `science' {
		local took_science "`took_science' took_`x'"
	}

	foreach x in `science' {
		replace `x' =0 if took_`x'==0
	}
	egen took_science = rowmax(`took_science')
	egen N_science_taken = rowtotal(`took_science')
	egen max_science_taken = rowmax(`science')
	egen sum_science = rowtotal(`science')
	g ap_science=max_science_taken

	
	*****************
	*THE TOP 3 MOST *
	*POPULAR FILES  *
	*****************
	local thebig3subs "calc ushistgr engl stat"
	foreach x in `thebig3subs' {
		
		replace ap_`x' =0 if took_ap_`x'==0
		}
	foreach x in	calc ushistgr engl science span  ushistgov econ  stat language{

		gen above_s2_`x' = (ap_`x' >= 2)
		gen above_s3_`x' = (ap_`x' >= 3)
		gen above_s4_`x' = (ap_`x' >= 4)
		gen above_s5_`x' = (ap_`x' >= 5)

	}
	 
	 ren took_ap_* took_*

	
	foreach v of varlist took_* ap_* above_* {
		replace `v'=. if proj_year<2007
		}
		

***********************************************
* Process 10th grade MCAS scores 
***********************************************	
g first_escaleds10 = escaleds10
g first_mscaleds10= mscaleds10

*CODE A DUMMY FOR PASSING THREASHOLD IN FIRST TRY *
local type "first "
*NEEDS IMPROVEMENT
foreach t in `type' {
	gen `t'_ni =  (`t'_escaleds10 >= 220 & `t'_mscaleds10 >= 220 & proj_year12 >= 2003 & proj_year12 <=2020)
	replace `t'_ni = 0 if (`t'_escaleds10 < 220|`t'_mscaleds10 < 220)
	replace `t'_ni = . if `t'_escaleds10 ==.|`t'_mscaleds10 ==. 	
	replace `t'_ni = . if proj_year12==. 
}
*MEET COMPETENCY DETERMINATION
foreach t in `type' {
	gen `t'_pass = ((`t'_escaleds10 >= 220 & `t'_mscaleds10 >= 220 & proj_year12 >= 2003 & proj_year12 <=2009)| ///
	(`t'_escaleds10 >= 240 & `t'_mscaleds10 >= 240 & proj_year12 >= 2010 & proj_year12 <=2020))
	replace `t'_pass = 0 if (`t'_escaleds10 < 220|`t'_mscaleds10 < 220) 
	replace `t'_pass = . if `t'_escaleds10 ==.|`t'_mscaleds10 ==. 				  
	replace `t'_pass = . if proj_year12==.
}
*PROFICIENT
foreach t in `type' {
	gen `t'_profi = (`t'_escaleds10 >= 240 & `t'_mscaleds10 >= 240 & proj_year12 >= 2003 & proj_year12 <=2020)
	replace `t'_profi = 0 if (`t'_escaleds10 < 240|`t'_mscaleds10 < 240)  
	replace `t'_profi = . if `t'_escaleds10 ==.|`t'_mscaleds10 ==. 				  
	replace `t'_profi = . if proj_year12==.
}
*ADVANCED  
foreach t in `type' {
	gen `t'_adv = (`t'_escaleds10 >= 260 & `t'_mscaleds10 >= 260 & proj_year12 >= 2003 & proj_year12 <=2020)
	replace `t'_adv = 0 if (`t'_escaleds10 < 260|`t'_mscaleds10 < 260)  			  
	replace `t'_adv = . if `t'_escaleds10 ==.|`t'_mscaleds10 ==. 				  
	replace `t'_adv = . if proj_year12==.
} 
 
*GENERATE ELA AND MATH FIRST-ATTEMPTS DUMMIES *
foreach x in e m {   
	foreach t in `type' {  
	
		gen `x'_`t'_ni = (`t'_`x'scaleds10 >= 220 & proj_year12 >= 2003 & proj_year12 <=2020)
		replace `x'_`t'_ni = 0 if (`t'_`x'scaleds10 < 220)  
		replace `x'_`t'_ni = . if `t'_`x'scaleds10 ==.				  
		replace `x'_`t'_ni = . if proj_year12==.

		gen `x'_`t'_pass = ((`t'_`x'scaleds10 >= 220 & proj_year12 >= 2003 & proj_year12 <=2009)| ///
		(`t'_`x'scaleds10 >= 240 & proj_year12 >= 2010 & proj_year12 <=2020))
		replace `x'_`t'_pass = 0 if (`t'_`x'scaleds10 < 220)  
		replace `x'_`t'_pass = . if `t'_`x'scaleds10 ==.			  
		replace `x'_`t'_pass = . if proj_year12==.

		gen `x'_`t'_profi = (`t'_`x'scaleds10 >= 240 & proj_year12 >= 2003 & proj_year12 <=2020)
		replace `x'_`t'_profi = 0 if (`t'_`x'scaleds10 < 240)  
		replace `x'_`t'_profi = . if `t'_`x'scaleds10 ==.				  
		replace `x'_`t'_profi = . if proj_year12==.

		gen `x'_`t'_adv = (`t'_`x'scaleds10 >= 260 & proj_year12 >= 2003 & proj_year12 <=2020)
		replace `x'_`t'_adv = 0 if (`t'_`x'scaleds10 < 260)  
		replace `x'_`t'_adv = . if `t'_`x'scaleds10 ==.  				  
		replace `x'_`t'_adv = . if proj_year12==.
		
	}		
}

*CODE A DUMMY FOR ADAMS SCHOLARSHIP EIGIBILITY * 
gen first_combined = first_escaleds10 + first_mscaleds10 

gen temp1 = (escaleds10 >= 240 & mscaleds10 >= 240 & mscaleds10 ~=. & escaleds10 ~=.)
replace temp1 =. if escaleds10 ==.|mscaleds10==.
replace temp1 =. if proj_year12==.
gen temp2 = ((escaleds10 >= 260|mscaleds10 >=260) & mscaleds10 ~=. & escaleds10 ~=.)
replace temp2 =. if escaleds10 ==.|mscaleds10==.
replace temp2 =. if proj_year12 ==.
gen temp3 = ((first_combined >= 502 & proj_year12 == 2005 & first_combined ~=.)| ///
		     (first_combined >= 502 & proj_year12 == 2006 & first_combined ~=.)| ///
             (first_combined >= 504 & proj_year12 == 2007 & first_combined ~=.)| ///
             (first_combined >= 512 & proj_year12 == 2008 & first_combined ~=.)| ///
             (first_combined >= 510 & proj_year12 == 2009 & first_combined ~=.)| ///
             (first_combined >= 516 & proj_year12 == 2010 & first_combined ~=.)| ///
	         (first_combined >= 516 & proj_year12 == 2011 & first_combined ~=.)| ///
			 (first_combined >= 516 & proj_year12 == 2012 & first_combined ~=.)| ///
			 (first_combined >= 516 & proj_year12 == 2013 & first_combined ~=.)| ///
			 (first_combined >= 516 & proj_year12 == 2014 & first_combined ~=.)| ///
			 (first_combined >= 520 & proj_year12 == 2015 & first_combined ~=.))
replace temp3=. if first_combined ==.	
replace temp3=. if proj_year12 ==.
		 	 //bsed on BPS -- recode with schools? 
gen adams_elgh = (temp1==1 & temp2==1 & temp3==1)
replace adams_elgh =. if temp1==. & temp2==. & temp3==.
drop temp1 temp2 temp3


*LABEL MCAS OUTCOMES *
label var adams_elgh "eligible for adams using BPS cutoff"
label var first_pass "a dummy for passing the threshold for the first time"
label var first_ni "a dummy for passing NI"
label var first_profi "a dummy for passing proficiency of 240"
label var first_adv "a dummy for passing proficiency of 260"
label var first_escaleds10 "ELA scaled score at first taking"
label var first_mscaleds10 "Math scaled score at first taking"

*UPDATE THIS WHEN ADD NEW DATA !!!!!!!!!
		*Limit some outcomes to appropriate years
		
		*SAT/AP should be limited to those who reach projected senior year in year of data
		foreach v of varlist   took_*	 N_aps_taken	///
		  took_sat* above*  above* {
			replace `v'=0 if `v'==. & proj_year12<=2019 //update when process AP/SAT -- last update is from 2019
			replace `v'=. if  proj_year12>2019|proj_year12<=2007 //SAT and AP outcomes not available before class of 2007
			}	
			
		*HS grad must have SIMS of relenvant spring
		foreach v of varlist  hsgrad_4yr {
			replace `v'=0 if `v'==. & proj_year12<=2019 //update when process sims
			replace `v'=. if  proj_year12>2019
			}
		foreach v of varlist  hsgrad_5yr {
			replace `v'=0 if `v'==. & proj_year12<=2018 //update when process sims
			replace `v'=. if  proj_year12>2018
			}
		foreach v of varlist hsgrad_6yr  {
			replace `v'=0 if `v'==. & proj_year12<=2017 //update when process sims
				replace `v'=. if  proj_year12>2017
		}			
		
		*NSC -- each year fills in a little more but, must have graduating class at the very least
		*NSC -- each year fills in a little more but, must have graduating class at the very least
		
		foreach v of varlist att*Y1* {
			replace `v'=0 if `v'==. & proj_year12<=2020
				replace `v'=. if  proj_year12>2020
		}		
		foreach v of varlist att*Y2* {
			replace `v'=0 if `v'==. & proj_year12<=2020
				replace `v'=. if  proj_year12>2020
		}
		foreach v of varlist att*Y3* cmp*Y3*{
			replace `v'=0 if `v'==. & proj_year12<=2020
				replace `v'=. if  proj_year12>2020
		}
		foreach v of varlist att*Y4* cmp*Y4*{
			replace `v'=0 if `v'==. & proj_year12<=2020
				replace `v'=. if  proj_year12>2020
		}
		foreach v of varlist att*Y5* cmp*Y5* {
			replace `v'=0 if `v'==. & proj_year12<=2019
					replace `v'=. if  proj_year12>2019
		}
		foreach v of varlist att*Y6* cmp*Y6*{
			replace `v'=0 if `v'==. & proj_year12<=2018
				replace `v'=. if  proj_year12>2018
		}
		foreach v of varlist att*Y7* cmp*Y7* {
			replace `v'=0 if `v'==. & proj_year12<=2017
				replace `v'=. if  proj_year12>2017
		}
		foreach v of varlist att*Y8* cmp*Y8*{
			replace `v'=0 if `v'==. & proj_year12<=2016
				replace `v'=. if  proj_year12>2016
		}

	
	//DECIDE what is right here!!
	*0      1  2  3  4  5  6  7  8
	*2011	12 13 14 15 16 17 18 19
	*2012   13 14 15 16 17 18 19 20
	*2013   14 15 16 17 18 19 20 21
	*2014   15 16 17 18 19 20 21 22
	*2015   16 17 18 19 20 21 22 23
	*2016   17 18 19 20 21 22 23 24
	*2017	18 19 20 21 22 23 24
	*2018	19 20 21 22 23 24
	*2019	20 21 22 23 24
	


	
*college persistence
foreach n of numlist 4/7{
	g persist_byY`n'=0 if att_any_byY`n'!=.
	g colyrs_4yr_byY`n'= 0 if att_any_byY`n'!=.
	replace colyrs_4yr_byY`n' = (att_4yr_inY1fall+att_4yr_inY2fall +att_4yr_inY3fall+att_4yr_inY4fall+att_4yr_inY1spring+att_4yr_inY2spring +att_4yr_inY3spring+att_4yr_inY4spring)/2 if att_any_byY`n'!=.
		if `n'== 5		replace colyrs_4yr_byY`n'= colyrs_4yr_byY`n'+(att_4yr_inY5fall+att_4yr_inY5spring)/2 if att_any_byY`n'!=.
		if `n'== 6		replace colyrs_4yr_byY`n'= colyrs_4yr_byY`n'+(att_4yr_inY6fall+att_4yr_inY6spring)/2 if att_any_byY`n'!=.
		if `n'== 7		replace colyrs_4yr_byY`n'= colyrs_4yr_byY`n'+(att_4yr_inY7fall+att_4yr_inY7spring)/2 if att_any_byY`n'!=.
	g colyrs_2yr_byY`n'= 0 if att_any_byY`n'!=.
		replace colyrs_2yr_byY`n' = (att_2yr_inY1fall+att_2yr_inY2fall +att_2yr_inY3fall+att_2yr_inY4fall+att_2yr_inY1spring+att_2yr_inY2spring +att_2yr_inY3spring+att_2yr_inY4spring)/2 if att_any_byY`n'!=.
		if `n'== 5		replace colyrs_2yr_byY`n'= colyrs_2yr_byY`n'+(att_2yr_inY5fall+att_2yr_inY5spring)/2 if att_any_byY`n'!=.
		if `n'== 6		replace colyrs_2yr_byY`n'= colyrs_2yr_byY`n'+(att_2yr_inY6fall+att_2yr_inY6spring)/2 if att_any_byY`n'!=.
		if `n'== 7		replace colyrs_2yr_byY`n'= colyrs_2yr_byY`n'+(att_2yr_inY7fall+att_2yr_inY7spring)/2 if att_any_byY`n'!=.
	replace persist_byY`n'=1 if cmp_BA_byY`n'==1 & att_any_byY`n'!=.
	replace persist_byY`n'=1 if colyrs_4yr_byY`n'>=4 &  att_any_byY`n'!=.
	replace persist_byY`n'=1 if colyrs_2yr_byY`n'>=2 & colyrs_4yr_byY`n'>=2 & att_any_byY`n'!=.
}		
******************************************************
		******************Voting***************
******************************************************

	
g byear = year(baselinedob)
g bmonth = month(baselinedob)
g bday = day(baselinedob)
g year18 = byear + 18
	
g turn18=mdy(bmonth,bday,year18)
drop byear bmonth bday year18
format turn18 %d
format baselinedob %d


foreach  n  in 8nov2016 4nov2014 6nov2012 2nov2010 4nov2008 7nov2006  {
	gen atleast18_`n'=(turn18 - td(`n'))<=0
	 
	}
 
gen atleast18=(turn18 - td(8nov2016))<=0



*proportion variables
egen two = rowmean(vote_gen_2016 vote_gen_2012 ) if atleast18_6nov2012==1
egen three = rowmean(vote_gen_2016 vote_gen_2012 vote_gen_2008)  if atleast18_4nov2008==1
g prop_pres = 0 if atleast18==1
	replace prop_pres = 1  if atleast18==1 & vote_gen_2016==1
	replace prop_pres = two  if atleast18_6nov2012==1
	replace prop_pres = three  if atleast18_4nov2008==1

egen a = rowmean(vote_gen_2016 vote_gen_2014 ) if atleast18_4nov2014==1
egen b = rowmean(vote_gen_2016 vote_gen_2014 vote_gen_2012 ) if atleast18_6nov2012==1
egen c = rowmean(vote_gen_2016 vote_gen_2014 vote_gen_2012 vote_gen_2010) if atleast18_2nov2010==1
egen d = rowmean(vote_gen_2016 vote_gen_2014 vote_gen_2012 vote_gen_2010 vote_gen_2008) if atleast18_4nov2008==1
egen e = rowmean(vote_gen_2016 vote_gen_2014 vote_gen_2012 vote_gen_2010 vote_gen_2008 vote_gen_2006) if atleast18_7nov2006==1

g prop_gen = 0 if atleast18==1
	replace prop_gen = 1  if atleast18==1 & vote_gen_2016==1
	replace prop_gen = a  if atleast18_4nov2014==1	
	replace prop_gen = b  if atleast18_6nov2012==1
	replace prop_gen = c  if atleast18_2nov2010==1		
	replace prop_gen = d  if atleast18_4nov2008==1
	replace prop_gen = e  if atleast18_7nov2006==1
	
drop two three a b c d e 
	

g voter_sample = ( baselinemasscode~=.&hasbaselinedemos==1& inbostonbaseline==1 &boston_sample==1& proj_year>=2006&proj_year12<=2017&atleast18==1)

*other outcomes

*winsorize attendance
foreach v of varlist totattend*{
		replace `v'=200 if `v'!=.&`v'>200
		}
g totattendhs = totattend9+totattend10+totattend11+totattend12
foreach n of numlist 9/12{
	g present`n' = masscode`n'!=.
}
g presenths = present9==1&present10==1&present11==1&present12==1

*Convert attendance days to attendance rates
foreach n of numlist 9/12{
bys masscode`n': egen max`n' = max(attend`n')
replace max`n'=180 if charter`n'!=1
replace max`n'=180 if max`n'<180
g attendrate`n'=attend`n'/max`n'
}
g denom = max9+max10+max11+max12 if presenths==1
g attendratehs=totattendhs/denom if presenths==1


	***************
	* SSDR 
	***************
	
	foreach var in day_suspension inschsus_sum outschsus_sum {
		forvalues i = 4(1)12 {
			g `var'`i' = .
				forval year = 2004/2019 {
					replace `var'`i' = `var'`year' if year`i'==`year'
		}	
	  }
	}
	
drop outschsus_sum20* inschsus_sum20* day_suspension20*
	
	*SRC edited 7/11/2024 to include grade of application since that is the entry grade
foreach var in day_suspension outschsus_sum inschsus_sum {
egen rowtot5_`var' = rowtotal(`var'4-`var'12)
egen rowtot6_`var' = rowtotal(`var'5-`var'12)
egen rowtot7_`var' = rowtotal(`var'6-`var'12)
egen rowtot8_`var' = rowtotal(`var'7-`var'12)
egen rowtot10_`var' = rowtotal(`var'9-`var'12)
g `var'_postlotto = .
replace `var'_postlotto =  rowtot5_`var' if gradeapp==4
replace `var'_postlotto =  rowtot6_`var' if gradeapp==5
replace `var'_postlotto =  rowtot7_`var' if gradeapp==6
replace `var'_postlotto =  rowtot8_`var' if gradeapp==7
replace `var'_postlotto =  rowtot10_`var' if gradeapp==9
}	
	
gen ever_suspension_postlotto = 0 
gen ever_outschsus_postlotto = 0 
gen ever_inschsus_postlotto = 0

replace ever_suspension_postlotto = 1 if day_suspension_postlotto > 0
replace ever_outschsus_postlotto = 1 if outschsus_sum_postlotto > 0
replace ever_inschsus_postlotto = 1 if inschsus_sum_postlotto > 0

drop *day_suspension *outschsus_sum *inschsus_sum

foreach  n  in 8nov2016  6nov2012  4nov2008   {
	gen dif_`n'=(turn18 - td(`n'))	 
	}
*years of education	
g yearsbyfirst=.

*Projected class of 2006
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2006 // 4nov2008 *high school grad
replace yearsbyfirst = 10 +present11+present12 if hsgrad_6yr==0 &proj_year==2006   // 4nov2008 * non high school grad
replace yearsbyfirst = 13 if att_any_byY2fall==1  &proj_year==2006 // 4nov2008  *some college
replace yearsbyfirst = 14 if att_4yr_byY2fall==1  &proj_year==2006 // 4nov2008  *some college

*Projected class of 2007
replace yearsbyfirst = 12 if hsgrad_5yr==1 &proj_year==2007 // 4nov2008 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_5yr==0 &proj_year==2007   // 4nov2008 * non high school grad
replace yearsbyfirst = 13 if att_any_byY1==1 & proj_year==2007 // 4nov2008 *some college

*Projected class of 2008 OLD
replace yearsbyfirst = 12 if hsgrad_4yr==1 &proj_year==2008  &dif_4nov2008<=0 // 4nov2008 
replace yearsbyfirst =10 +present11+present12  if hsgrad_4yr==0 &proj_year==2008   &dif_4nov2008<=0 // 4nov2008 
replace yearsbyfirst = 12 if att_any_byY1==1 &proj_year==2008   &dif_4nov2008<=0 // 4nov2008 

*Projected class of 2008 YOUNG
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2008 &dif_4nov2008>0 // 6nov2012 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_6yr==0 &proj_year==2008 &dif_4nov2008>0  // 6nov2012 * non high school grad
replace yearsbyfirst = 13 if att_any_byY4fall==1  &proj_year==2008 &dif_4nov2008>0 // 6nov2012  *some college
replace yearsbyfirst = 14 if att_4yr_byY4fall==1  &proj_year==2008 &dif_4nov2008>0 // 6nov2012  *some college
replace yearsbyfirst = 14 if cmp_AA_byY4==1  &proj_year==2008 &dif_4nov2008>0 // 6nov2012  *some college
replace yearsbyfirst = 16 if cmp_BA_byY4==1  &proj_year==2008 &dif_4nov2008>0 // 6nov2012  *some collegee

*Projected class of 2009
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2009 // 6nov2012 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_6yr==0 &proj_year==2009   // 6nov2012 * non high school grad
replace yearsbyfirst = 13 if att_any_byY3fall==1  &proj_year==2009 // 6nov2012  *some college
replace yearsbyfirst = 14 if att_4yr_byY3fall==1  &proj_year==2009 // 6nov2012  *some college
replace yearsbyfirst = 14 if cmp_AA_byY3==1  &proj_year==2009 // 6nov2012  *some college

*Projected class of 2010
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2010 // 6nov2012 *high school grad
replace yearsbyfirst = 10 +present11+present12 if hsgrad_6yr==0 &proj_year==2010   // 6nov2012 * non high school grad
replace yearsbyfirst = 13 if att_any_byY2fall==1  &proj_year==2010 // 6nov2012  *some college
replace yearsbyfirst = 14 if att_4yr_byY2fall==1  &proj_year==2010 // 6nov2012  *some college

*Projected class of 2011
replace yearsbyfirst = 12 if hsgrad_5yr==1 &proj_year==2011 // 6nov2012 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_5yr==0 &proj_year==2011   // 6nov2012 * non high school grad
replace yearsbyfirst = 13 if att_any_byY1==1 & proj_year==2011 // 6nov2012 *some college


*Projected class of 2012 OLD
replace yearsbyfirst = 12 if hsgrad_4yr==1 &proj_year==2012  &dif_6nov2012<=0 // 6nov2012 
replace yearsbyfirst =10 +present11+present12  if hsgrad_4yr==0 &proj_year==2012   &dif_6nov2012<=0 // 6nov2012 
replace yearsbyfirst = 12 if att_any_byY1==1 &proj_year==2012   &dif_6nov2012<=0 // 6nov2012 


*edited below but not above. NEed to split class of 2012 into young and old
*Projected class of 2012 YOUNG
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2012 &dif_6nov2012>0 // 8nov2016 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_6yr==0 &proj_year==2012  &dif_6nov2012>0 // 8nov2016 * non high school grad
replace yearsbyfirst = 13 if att_any_byY5fall==1  &proj_year==2012 &dif_6nov2012>0 // 8nov2016  *some college
replace yearsbyfirst = 14 if att_4yr_byY5fall==1  &proj_year==2012&dif_6nov2012>0 // 8nov2016  *some college
replace yearsbyfirst = 14 if cmp_AA_byY5  &proj_year==2012&dif_6nov2012>0 // 8nov2016  *some college
replace yearsbyfirst = 16 if cmp_BA_byY5==1  &proj_year==2012 &dif_6nov2012>0 // 8nov2016  *some college

*Projected class of 2013
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2013 // 8nov2016 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_6yr==0 &proj_year==2013   // 8nov2016 * non high school grad
replace yearsbyfirst = 13 if att_any_byY4fall==1  &proj_year==2013 // 8nov2016  *some college
replace yearsbyfirst = 14 if att_4yr_byY4fall==1  &proj_year==2013 // 8nov2016  *some college
replace yearsbyfirst = 14 if cmp_AA_byY4==1  &proj_year==2013 // 8nov2016  *some college
replace yearsbyfirst = 16 if cmp_BA_byY4==1  &proj_year==2013 // 8nov2016  *some college

*Projected class of 2014
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2014 // 8nov2016 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_6yr==0 &proj_year==2014   // 8nov2016 * non high school grad
replace yearsbyfirst = 13 if att_any_byY3fall==1  &proj_year==2014 // 8nov2016  *some college
replace yearsbyfirst = 14 if att_4yr_byY3fall==1  &proj_year==2014 // 8nov2016  *some college
replace yearsbyfirst = 14 if cmp_AA_byY3==1  &proj_year==2014 // 8nov2016  *some college

*Projected class of 2015
replace yearsbyfirst = 12 if hsgrad_6yr==1 &proj_year==2015 // 8nov2016 *high school grad
replace yearsbyfirst = 10 +present11+present12 if hsgrad_6yr==0 &proj_year==2015   // 8nov2016 * non high school grad
replace yearsbyfirst = 13 if att_any_byY2fall==1  &proj_year==2015 // 8nov2016  *some college
replace yearsbyfirst = 14 if att_4yr_byY2fall==1  &proj_year==2015 // 8nov2016  *some college

*Projected class of 2016
replace yearsbyfirst = 12 if hsgrad_5yr==1 &proj_year==2016 // 8nov2016 *high school grad
replace yearsbyfirst =10 +present11+present12  if hsgrad_5yr==0 &proj_year==2016   // 8nov2016 * non high school grad
replace yearsbyfirst = 13 if att_any_byY1==1 & proj_year==2016 // 8nov2016 *some college

*Projected class of 2017
replace yearsbyfirst = 12 if hsgrad_4yr==1 &proj_year==2017 // 8nov2016 *high school grad
replace yearsbyfirst = 10 +present11+present12  if hsgrad_4yr==0 &proj_year==2017   // 8nov2016 * non high school grad
replace yearsbyfirst = 12 if att_any_byY1==1 &proj_year==2017 // 8nov2016 *count as high school grad if attending  college


*yearsever
g yearsever=.
replace yearsever = 10 +present11+present12 if hsgrad_6yr==0 //never graduate (assume stay in school until 16)
replace yearsever=12 if hsgrad_6yr==1 //hs only
replace yearsever = 13 if att_2yr_byY6==1 //some college 2yr
replace yearsever = 14 if att_4yr_byY6==1 // some college 4yr 
 replace yearsever=14 if cmp_AA_byY6==1 // AA
replace yearsever=16 if cmp_BA_byY6==1 //BA

drop dif_*nov*
	

/*
. su yearsever if initial_offer_boston==0&waitlist_offer_boston==0

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
   yearsever |      4,274    13.27656    1.927853         10         16

. su yearsever if initial_offer_boston==1|waitlist_offer_boston==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
   yearsever |      8,374    13.28648    1.903931         10         16
*/

compress

label var twoyearsout_m "Math score two years after lottery"
label var twoyearsout_e "ELA score two years after lottery"
label var first_pass "Pass high school graduation requirement"
label var adams_elgh "Qualify for state merit scholarship"
label var c_state_mrawsc10 "Math"
label var c_state_erawsc10 "ELA"
label var took_any "Took any AP"
label var N_aps_taken "Number of APs"
label var above_s2_any "Score 2+ on any AP" 
label var above_s3_any "Score 3+ on any AP" 
label var above_s4_any "Score 4+ on any AP" 
label var above_s5_any "Score 5 on any AP" 
label var took_sat "Took SAT"
label var score_satreason "SAT score (1600) (for takers)"
label var ontime10 "10th grade"
label var ontime11 "11th grade"
label var ontime12 "12th grade"
label var repeat12 "Repeat 12th grade"
label var hsgrad_4yr "Graduate high school (4 years)"
label var hsgrad_5yr "Graduate high school (5 years)"
label var hsgrad_6yr "Graduate high school (6 years)"
label var att_any_inY1 "1st year"
label var att_any_inY2 "2nd year"
label var att_any_inY3 "3rd year"
label var att_any_inY4 "4th year"
label var att_any_inY5 "5th year"
label var att_any_inY6 "6th year"
label var att_any_inY7 "7th year"
label var cmp_any_byY4 "4th year"
label var cmp_any_byY5 "5th year"
label var cmp_any_byY6 "6th year"
label var cmp_any_byY7 "7th year"
label var ever_registered "Ever registered to vote"
label var first_pos_gen "General election"
label var first_pos_pres "Presidential election"
label var ever_voted "In MA"
label var ever_general "In MA general election"
label var ever_primary "In MA primary election"
label var registered_by_19 "Registered to vote by 19th birthday"
label var two_adv_e	"Score Advanced ELA"
label var two_adv_m	"Score Advanced Math"
label var two_prof_e	"Score Proficient+ ELA"
label var two_prof_m	"Score Proficient+ Math"
label var inschsus_sum_postlotto    "Number of in-school suspensions"
label var outschsus_sum_postlotto   "Number of out-of-school suspensions"
label var day_suspension_postlotto  "Suspension days"
label var ever_suspension_postlotto "Any suspension"
label var ever_outschsus_postlotto  "Out-of-school suspension"
label var ever_inschsus_postlotto   "In-school suspension"
		
	ren *_nonpres *_gen_offcycle

		save "$data\baseanalysisfile_voter_lotto.dta", replace
